# R version 4.4.1
library(tidyr) # version 1.3.1
library(dplyr, warn.conflicts = FALSE) # version 1.1.4
library(ggplot2) # version 3.5.1
library(broom) # version 1.0.6
library(MASS, warn.conflicts = FALSE) # version 7.3-60.2

########################
# Import and clean data
########################
tweets <- read.csv("./data/covid_textless_tweets.csv")
users <- read.csv('./data/covid_twitter_users.csv')

users <- users[complete.cases(users),]
users <- users[users['tweets'] >= 1 & users['Rhat'] <= 1.1,]
tweets$id10 <- as.numeric(substr(tweets$user, 1, 10))
tweets_sample <- tweets[tweets$id10 %in% users$id10,]
tweets_sample <- left_join(tweets_sample, users[,c('id10', 'ideology')], by = 'id10')

users$pct_black <- users$nhb/users$population
users$pct_asn <- users$asian/users$population
users$pct_hisp <- users$hisp/users$population
users$rep_share <- users$rep_votes/users$totalvotes
users$infect_rate <- users$cases/users$population
users$death_rate <- users$deaths/users$population
user_level_data <- tweets_sample %>%
  group_by(id10) %>%                       
  summarize(non_comp_sum = sum(non_comp, na.rm = T),
            debate_sum = sum(debate, na.rm = T))

users_debate <- merge(users, user_level_data, by = "id10", all.y = T)

###########
# Modeling
###########

## Orginal model in Block Jr. et al. (2022) ----

fit_death4 <- glm.nb(non_comp_sum ~ 
                       # Ideology and Death
                       ideology +
                       death_rate + 
                       ideology*death_rate +
                       
                       # Offset
                       offset(log(tweets)) +
                       
                       # Age, education, density
                       p_65_up +
                       pbach_grad +
                       POPPCT_URBAN +
                       
                       # Race
                       pct_hisp +
                       pct_black +
                       pct_asn +
                       
                       # Income
                       log(med_inc) +
                       
                       #Politics
                       rep_share +
                       
                       # Fixed Effects
                       state,
                     
                     data = users_debate)

## DEBATE model ----

fit_death4_d <- glm.nb(debate_sum~ 
                       # Ideology and Death
                       ideology +
                       death_rate + 
                       ideology*death_rate +
                       
                       # Offset
                       offset(log(tweets)) +
                       
                       # Age, education, density
                       p_65_up +
                       pbach_grad +
                       POPPCT_URBAN +
                       
                       # Race
                       pct_hisp +
                       pct_black +
                       pct_asn +
                       
                       # Income
                       log(med_inc) +
                       
                       #Politics
                       rep_share +
                       
                       # Fixed Effects
                       state,
                     
                     data = users_debate)

####################
# Coefficient plot
####################

fit_death4_tidy <- tidy(fit_death4, conf.int = T)
fit_death4_tidy <- fit_death4_tidy[c(2:3, nrow(fit_death4_tidy)), ]
fit_death4_tidy <- fit_death4_tidy |>
  mutate(model = "Block Jr. et al. (2022)")

fit_death4_d_tidy <- tidy(fit_death4_d, conf.int = T)
fit_death4_d_tidy <- fit_death4_d_tidy[c(2:3, nrow(fit_death4_d_tidy)), ]
fit_death4_d_tidy <- fit_death4_d_tidy |>
  mutate(model = "DEBATE model")

all_models <- bind_rows(fit_death4_tidy,fit_death4_d_tidy)
all_models[all_models == "ideology"] <- "Ideology"
all_models[all_models == "death_rate"] <- "Death Rate"
all_models[all_models == "ideology:death_rate"] <- "Ideology:Death Rate"

all_models <- all_models %>%
  mutate(label = paste0(round(estimate, 1), " (", formatC(p.value, format = "f", digits = 2), ")"))

c <- all_models |>
  mutate(term = factor(term, levels = c("Ideology:Death Rate", "Death Rate", "Ideology"))) |>
  ggplot(aes(estimate, term, color = model)) +
  geom_point(position = position_dodge(width = 0.8)) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), linewidth = 1, height = 0.4, position = position_dodge(width = 0.8)) +
  scale_color_manual(values =c("#0173B2", "#DE8F05")) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_text(aes(label = label, group = model),
            color = 'black',
            position = position_dodge(width = 0.9), 
            vjust = -0.9, size = 5, show.legend = FALSE, hjust = -.025) + 
  labs( x = 'Estimates', y = "") +
  theme_classic() +
  theme(text = element_text(family = "Verdana"),
        axis.text = element_text(size=12, color = 'black'),
        axis.title = element_text(size=15),
        legend.text = element_text(size=15),
        legend.title = element_blank(),
        legend.position = 'bottom') +
  guides(color = guide_legend( override.aes = list(size = 5)))

#c
ggsave("./figures/figure_5.png", plot = c, dpi = 300, width = 8, height = 4)
